# 0_read_data_additional.R
# additional data management for new voting variable
# called by Analysis_incentive_adrian_OR.Rmd
# March 2022

### all remaining code below from March 2022
load('data/NewVaccineTable.RData') # from Ray
df2 = filter(df, Extra ==0 ) %>%
  mutate(Treatment2 = if_else(Treatment == "treatment1", "CDC Health Information", 
                              if_else(Treatment == "treatment2", "Lottery Incentive",
                                      if_else(Treatment == "treatment3", "Cash Voucher Incentive", NA_character_))))
# make reference group:
df2 = mutate(df2, 
             Treatment2 = factor(Treatment2),
             Treatment2 = relevel(Treatment2, ref='CDC Health Information'), # set reference level
             Race = if_else(Q2.6 == "Black or African American", "Black", 
                            if_else(Q2.6 == "White", "White", "Other")),
             Education = 
               case_when(Q2.4 == "Doctorate" ~ "High", 
                         Q2.4 == "Associate degree" ~ "Medium",
                         Q2.4 == "Some college education, no degree" ~ "Medium",
                         Q2.4 == "Master's degree (including professional degrees, or equivalent)" ~ "High", 
                         Q2.4 == "Some high school" ~ "Low", 
                         Q2.4 == "High school graduate, diploma (or equivalent)" ~ "Low", 
                         Q2.4 == "Training/vocational college"~  "Medium",
                         Q2.4 == "Nursery to 8th Grade" ~ "Low",
                         Q2.4 == "None" ~ "Low",
                         Q2.4 == "Bachelor's degree" ~ "High")) %>%
  rename('age' = 'Q2.2',
         'gender' = 'Q2.3') %>%
  mutate(age= as.numeric(age))

# three missing
tmedian = median(df2$TrumpShare, na.rm=TRUE)
df2 = mutate(df2, TrumpShare= ifelse(is.na(TrumpShare)==TRUE, tmedian, TrumpShare))

# add in pool and state and trump variable
small = select(pretest_final, ResponseId, pool, state, Trumphi)
df2 = left_join(df2, small, by='ResponseId')

# compare
table(pretest_final$pool)
table(df2$pool)

#
summary(pretest_final$age)
summary(df2$age)

#
summary(pretest_final$Trumppercent)
summary(df2$TrumpShare)

# rename for ease of use below
pretest_final = df2 
#names(pretest_final)[(names(pretest_final) %in% names(df))==FALSE]
#names(df2)[grep('state', names(df2), ignore.case=TRUE)]

